Intro Thoughts

Status Quo

library(tidyverse)

births_path <- "https://raw.githubusercontent.com/EvaMaeRey/tableau/9e91c2b5ee803bfef10d35646cf4ce6675b92b55/tidytuesday_data/2018-10-02-us_births_2000-2014.csv"

library(ggcalendar)

readr::read_csv(births_path) %>% 
  mutate(month = str_pad(month, 2, pad = "0"),
         date_of_month = str_pad(date_of_month, 2, pad = "0")) %>% 
  mutate(date = paste(year, month, date_of_month, sep = "-") %>% as_date()) %>% 
  mutate(ind_holiday = 
           (month == "12" & date_of_month %in% 24:31) |
           (month == "07" & date_of_month == "04") |
           (month == "01" & date_of_month == "01") | 
           (month == "10" & date_of_month == "31") | 
           (month == "11" & date_of_month %in% 20:30)
           ) |>
  mutate(date_in_2020 = paste(2020, month, date_of_month, sep = "-") %>% as_date()) |>
  mutate(ind_weekend = wday(date) == 1 | wday(date) == 7) |>
  mutate(ind_Feb_29th = month(date) == 2 & day(date) == 29) |>
  mutate(ind_13th = day(date) == 13) |>
  mutate(ind_Fri13th = wday(date) == 6 & day(date) == 13) ->
births_df
## Rows: 5479 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5): year, month, date_of_month, day_of_week, births
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
births_df %>%
  filter(year == 2012) %>% 
  ggcalendar() + 
  aes(date = date) + 
  geom_point_calendar() +
  aes(size = births) +
  aes(color = births) +
  geom_text_calendar(aes(label = day(date)), color = "oldlace", size = 2) + 
  guides(
    colour = guide_legend("Births"),
    size = guide_legend("Births")
 ) + 
  geom_point_calendar(data = data.frame(date =
                                      as_date("2012-12-25")),
                      size = 5, color = "red", shape = 21) + 
  scale_color_viridis_c() + 
  labs(title = "The year in 2000 in births")

ggchalkboard:::geoms_chalk_on()

ggplot(births_df) + 
  aes(x = births) + 
  geom_histogram() + 
  ggxmean::geom_x_mean() +
  ggxmean::geom_x_mean_label() + 
  geom_rug(alpha = .2) + 
  labs(x = "Number of births") + 
  ggchalkboard::theme_chalkboard() + 
  labs(title = "Distribution of Number of Births in the U.S. each day from 2000-2014" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Using the `size` aesthetic with geom_segment was deprecated in ggplot2 3.4.0.
## ℹ Please use the `linewidth` aesthetic instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

last_plot() + 
  facet_wrap(~ind2cat::ind_recode(ind_weekend), ncol = 1) + 
  labs(title = "We explore the bimodal distribution looking at 'weekend effects'" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() %+%
  facet_wrap(~ind2cat::ind_recode(ind_holiday), ncol = 1) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(last_plot() + 
  facet_wrap(~ind2cat::ind_recode(ind_weekend), ncol = 1) + 
  labs(title = "We explore the bimodal distribution looking at 'weekend effects'" %>% str_wrap(45))) %+%
  (births_df |> filter(!ind_holiday))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() + 
  facet_wrap(~ind2cat::ind_recode(ind_13th, rev = T), ncol = 1) + 
  labs(title = "We also look supersticion around the number 13 might impact number of births" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() + 
  facet_grid(wday(date, label = T) ~
               ind2cat::ind_recode(ind_13th, rev = T))  + 
  labs(title = "For fun, we break the data up by day of the week and 13th" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() + 
  facet_grid(wday(date, label = T) ~
               ind2cat::ind_recode(ind_Feb_29th)) + 
  labs(title = "Finally, we turn to Feb 29th - how does birth rate compare to other days?" %>% str_wrap(45))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

last_plot() %+%
  (births_df |> 
     filter(date_in_2020 >= as.Date("2020-02-26")) |>
     filter(date_in_2020 <= as.Date("2020-03-03")) 
   ) + 
  geom_rug() + 
  labs(title = "We narrow our comparisons to February 26 to March 3 in each year" %>% str_wrap(45)) +
  labs(subtitle = "Between Feb 27 and March 3rd by day of week and Feb 19 indicator " %>% str_wrap(55))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

births_df |>
  filter(!ind_holiday) |>
  filter(!ind_13th) |>
  ggplot() + 
  aes(x = date_in_2020, y = births) + 
  geom_line() + 
  aes(color = year) + 
  facet_wrap(~wday(date, label = T)) + 
  # geom_vline(xintercept = as.Date("2020-02-29")) + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## colour.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

lm(births ~ year + month, births_df)
## 
## Call:
## lm(formula = births ~ year + month, data = births_df)
## 
## Coefficients:
## (Intercept)         year      month02      month03      month04      month05  
##    76258.36       -32.56       236.40       215.86       115.10       264.21  
##     month06      month07      month08      month09      month10      month11  
##      565.34       812.54      1005.55      1090.30       495.22       221.76  
##     month12  
##      262.03
# ggchalkboard:::geoms_chalk_off()

births_df |> 
     filter(date_in_2020 >= as.Date("2020-02-24")) |>
     filter(date_in_2020 <= as.Date("2020-03-05")) |>
  ggplot() + 
  aes(x = date_in_2020, y  = births) + 
  geom_line(color = "black", alpha= .2) +
  geom_point(aes(shape = ind_weekend, 
                 color = ind_Feb_29th,
                 size = ind_Feb_29th)) +
  geom_text(aes(label = wday(date, label = T)), 
            vjust = -0.2) + 
  # geom_text(aes(label = births), 
  #           vjust = 1.2) + 
  facet_wrap(~year)
## Warning: Using size for a discrete variable is not advised.

last_plot() +
  aes(linetype = ind_weekend)
## Warning: Using size for a discrete variable is not advised.

Experiment

Closing remarks, Other Relevant Work, Caveats